## Analysis of daily plate reader measurments of populations evolving in galactose (Evolution Experiment #25 analysis)

# # evo-cassette Locus E with and without IS deletion ("deltaIS1C" and "(IT0)30", resp.);

# Locus E with and without IS deletion (deltaIS1C::FRT = IT049 background; IS+= MS022 background; both camR); 
#each with 4 different random non-promoter sequences: "p0, p0_1, p0_1, p0_2, p0-3"

#file naming:
#e.g.: EE25_22-0_OD.txt = evolutione xperiment 25_IS+ strain (MS022 background)_p0_optical density
#e.g.: EE25_49-2_YFP.txt = evolutione xperiment 25_IS- strain (IT049 background)_p0-2_YFP intensity

#Figure plots indicated as such.

#########################################################################################

######read in data
setwd("/Users/itomanek/Documents/promoter_evolution/experiments/platereader_data/2020_June/EE25") 

#read data
#IS+ background (MS022, E::ECp0-3)
OD_B22_0= read.csv2("EE25_22-0_OD.txt", header=TRUE, sep= "\t") #p0_0
YFP_B22_0=read.csv2("EE25_22-0_YFP2.txt", header=TRUE, sep= "\t")
CFP_B22_0= read.csv2("EE25_22-0_CFP2.txt", header=TRUE, sep= "\t")
OD_B22_1= read.csv2("EE25_22-1_OD.txt", header=TRUE, sep= "\t") #p0_1
YFP_B22_1=read.csv2("EE25_22-1_YFP2.txt", header=TRUE, sep= "\t")
CFP_B22_1= read.csv2("EE25_22-1_CFP2.txt", header=TRUE, sep= "\t")
OD_B22_2= read.csv2("EE25_22-2_OD.txt", header=TRUE, sep= "\t")  #p0_2
YFP_B22_2=read.csv2("EE25_22-2_YFP2.txt", header=TRUE, sep= "\t")
CFP_B22_2= read.csv2("EE25_22-2_CFP2.txt", header=TRUE, sep= "\t")
OD_B22_3= read.csv2("EE25_22-3_OD.txt", header=TRUE, sep= "\t") #p0_3
YFP_B22_3=read.csv2("EE25_22-3_YFP2.txt", header=TRUE, sep= "\t")
CFP_B22_3= read.csv2("EE25_22-3_CFP2.txt", header=TRUE, sep= "\t")
#IS-
OD_B49_0= read.csv2("EE25_49-0_OD.txt", header=TRUE, sep= "\t") #p0_0
YFP_B49_0=read.csv2("EE25_49-0_YFP2.txt", header=TRUE, sep= "\t")
CFP_B49_0= read.csv2("EE25_49-0_CFP2.txt", header=TRUE, sep= "\t")
OD_B49_1= read.csv2("EE25_49-1_OD.txt", header=TRUE, sep= "\t") #p0_1
YFP_B49_1=read.csv2("EE25_49-1_YFP2.txt", header=TRUE, sep= "\t")
CFP_B49_1= read.csv2("EE25_49-1_CFP2.txt", header=TRUE, sep= "\t")
OD_B49_2= read.csv2("EE25_49-2_OD.txt", header=TRUE, sep= "\t")  #p0_2
YFP_B49_2=read.csv2("EE25_49-2_YFP2.txt", header=TRUE, sep= "\t")
CFP_B49_2= read.csv2("EE25_49-2_CFP2.txt", header=TRUE, sep= "\t")
OD_B49_3= read.csv2("EE25_49-3_OD.txt", header=TRUE, sep= "\t") #p0_3
YFP_B49_3=read.csv2("EE25_49-3_YFP2.txt", header=TRUE, sep= "\t")
CFP_B49_3= read.csv2("EE25_49-3_CFP2.txt", header=TRUE, sep= "\t")

#ctr - all 8 (MS022, IT049 with 4 different p0s)
OD_E= read.csv2("EE25_E_OD.txt", header=TRUE, sep= "\t")
YFP_E=read.csv2("EE25_E_YFP2.txt", header=TRUE, sep= "\t")
CFP_E= read.csv2("EE25_E_CFP2.txt", header=TRUE, sep= "\t")
#############################################################

##########rearrange the data so that each row contains the value for one 
#read of one well
library(reshape2)

#With melt, you specify which columns are identity variables, 
#and which columns are measured variables
#od
reshaped_B22_0 <- melt(OD_B22_0, id=c("Time"), variable.name="Well", value.name="OD600")
reshaped_B22_1 <- melt(OD_B22_1, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_B22_2 <- melt(OD_B22_2, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_B22_3 <- melt(OD_B22_3, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_B49_0 <- melt(OD_B49_0, id=c("Time"), variable.name="Well", value.name="OD600")
reshaped_B49_1 <- melt(OD_B49_1, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_B49_2 <- melt(OD_B49_2, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_B49_3 <- melt(OD_B49_3, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_E <- melt(OD_E, id=c("Time"), variable.name="Well",value.name="OD600")
#yfp
yfp_reshaped_B22_0= melt(YFP_B22_0, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_B22_1= melt(YFP_B22_1, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_B22_2= melt(YFP_B22_2, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_B22_3= melt(YFP_B22_3, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_B49_0= melt(YFP_B49_0, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_B49_1= melt(YFP_B49_1, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_B49_2= melt(YFP_B49_2, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_B49_3= melt(YFP_B49_3, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_E= melt(YFP_E, id=c("Time"), variable.name="Well", value.name="YFP")
#cfp
cfp_reshaped_B22_0= melt(CFP_B22_0, id=c("Time"), variable.name="Well", value.name="CFP")
cfp_reshaped_B22_1= melt(CFP_B22_1, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_B22_2= melt(CFP_B22_2, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_B22_3= melt(CFP_B22_3, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_B49_0= melt(CFP_B49_0, id=c("Time"), variable.name="Well", value.name="CFP")
cfp_reshaped_B49_1= melt(CFP_B49_1, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_B49_2= melt(CFP_B49_2, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_B49_3= melt(CFP_B49_3, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_E= melt(CFP_E, id=c("Time"), variable.name="Well",value.name="CFP")


##########transform the time to minues (platereader has format 00:00:00)
time<-reshaped_B22_0[,1]
time=as.character(time)
time=sapply(strsplit(time,":"),
            function(x) {
              x <- as.numeric(x)
              (x[1]+x[2]/60)/24
            })
time=round(time,2)
time #in days (with minutes as comma)

#######

reshaped_B22_0$Time=time  #replace the old time format with time in min 
reshaped_B22_1$Time=time
reshaped_B22_2$Time=time  
reshaped_B22_3$Time=time 
reshaped_B49_0$Time=time  
reshaped_B49_1$Time=time  
reshaped_B49_2$Time=time  
reshaped_B49_3$Time=time  
reshaped_E$Time=time  

yfp_reshaped_B22_0$Time=time  #replace the old time format with time in min 
yfp_reshaped_B22_1$Time=time
yfp_reshaped_B22_2$Time=time  
yfp_reshaped_B22_3$Time=time 
yfp_reshaped_B49_0$Time=time  
yfp_reshaped_B49_1$Time=time  
yfp_reshaped_B49_2$Time=time  
yfp_reshaped_B49_3$Time=time  
yfp_reshaped_E$Time=time  

cfp_reshaped_B22_0$Time=time  #replace the old time format with time in min 
cfp_reshaped_B22_1$Time=time
cfp_reshaped_B22_2$Time=time  
cfp_reshaped_B22_3$Time=time 
cfp_reshaped_B49_0$Time=time  
cfp_reshaped_B49_1$Time=time  
cfp_reshaped_B49_2$Time=time  
cfp_reshaped_B49_3$Time=time  
cfp_reshaped_E$Time=time  

###### PLATE INFO (TEMPLATE)  ########

#read in the plate template - i.e. metadata, additional info for E
plate_info=read.csv2("plate_layout_E.txt", header=TRUE, sep="\t")

# format of plate_template: well, strain 
#head(plate_info)

#combine reshaped and plate_info
#install.packages("dplyr")  ##info: https://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html
library("dplyr")

#join all data to existing data frame "annotated"
annotated_B22_0<- inner_join(reshaped_B22_0, yfp_reshaped_B22_0, by= c("Time","Well"))
annotated_B22_0 <- inner_join(annotated_B22_0, cfp_reshaped_B22_0, by=c("Time","Well"))
annotated_B22_1<- inner_join(reshaped_B22_1, yfp_reshaped_B22_1, by= c("Time","Well"))
annotated_B22_1 <- inner_join(annotated_B22_1, cfp_reshaped_B22_1, by=c("Time","Well"))
annotated_B22_2<- inner_join(reshaped_B22_2, yfp_reshaped_B22_2, by= c("Time","Well"))
annotated_B22_2 <- inner_join(annotated_B22_2, cfp_reshaped_B22_2, by=c("Time","Well"))
annotated_B22_3<- inner_join(reshaped_B22_3, yfp_reshaped_B22_3, by= c("Time","Well"))
annotated_B22_3 <- inner_join(annotated_B22_3, cfp_reshaped_B22_3, by=c("Time","Well"))
annotated_B49_0<- inner_join(reshaped_B49_0, yfp_reshaped_B49_0, by= c("Time","Well"))
annotated_B49_0 <- inner_join(annotated_B49_0, cfp_reshaped_B49_0, by=c("Time","Well"))
annotated_B49_1<- inner_join(reshaped_B49_1, yfp_reshaped_B49_1, by= c("Time","Well"))
annotated_B49_1 <- inner_join(annotated_B49_1, cfp_reshaped_B49_1, by=c("Time","Well"))
annotated_B49_2<- inner_join(reshaped_B49_2, yfp_reshaped_B49_2, by= c("Time","Well"))
annotated_B49_2 <- inner_join(annotated_B49_2, cfp_reshaped_B49_2, by=c("Time","Well"))
annotated_B49_3<- inner_join(reshaped_B49_3, yfp_reshaped_B49_3, by= c("Time","Well"))
annotated_B49_3 <- inner_join(annotated_B49_3, cfp_reshaped_B49_3, by=c("Time","Well"))
annotated_E<- inner_join(reshaped_E, yfp_reshaped_E, by= c("Time","Well"))
annotated_E <- inner_join(annotated_E, cfp_reshaped_E, by=c("Time","Well"))
annotated_E <- inner_join(annotated_E, plate_info, by="Well", copy=TRUE)

#add the  promoter info in additional column
annotated_B22_0$promoter=rep(0,dim(annotated_B22_0)[1])
annotated_B22_1$promoter=rep(1,dim(annotated_B22_1)[1])
annotated_B22_2$promoter=rep(2,dim(annotated_B22_2)[1])
annotated_B22_3$promoter=rep(3,dim(annotated_B22_3)[1])
annotated_B49_0$promoter=rep(0,dim(annotated_B49_0)[1])
annotated_B49_1$promoter=rep(1,dim(annotated_B49_1)[1])
annotated_B49_2$promoter=rep(2,dim(annotated_B49_2)[1])
annotated_B49_3$promoter=rep(3,dim(annotated_B49_3)[1])
#add the  strain background info in additional column
annotated_B22_0$strain=rep(22,dim(annotated_B22_0)[1])
annotated_B22_1$strain=rep(22,dim(annotated_B22_1)[1])
annotated_B22_2$strain=rep(22,dim(annotated_B22_2)[1])
annotated_B22_3$strain=rep(22,dim(annotated_B22_3)[1])
annotated_B49_0$strain=rep(49,dim(annotated_B49_0)[1])
annotated_B49_1$strain=rep(49,dim(annotated_B49_1)[1])
annotated_B49_2$strain=rep(49,dim(annotated_B49_2)[1])
annotated_B49_3$strain=rep(49,dim(annotated_B49_3)[1])
#E has a plate info added with this 


###### Group data ###############################################

grouped_B22_0 <- group_by(annotated_B22_0,Time, Well,strain, promoter)
grouped_B22_1 <- group_by(annotated_B22_1,Time, Well,strain, promoter)
grouped_B22_2 <- group_by(annotated_B22_2,Time, Well,strain, promoter)
grouped_B22_3 <- group_by(annotated_B22_3,Time, Well,strain, promoter)
grouped_B49_0 <- group_by(annotated_B49_0,Time, Well,strain, promoter)
grouped_B49_1 <- group_by(annotated_B49_1,Time, Well,strain, promoter)
grouped_B49_2 <- group_by(annotated_B49_2,Time, Well,strain, promoter)
grouped_B49_3 <- group_by(annotated_B49_3,Time, Well,strain, promoter)
grouped_E <- group_by(annotated_E,Time, Well,strain, promoter)


#### combine data - all but E ctr
grouped_All=rbind(grouped_B22_0,grouped_B22_1,grouped_B22_2,grouped_B22_3,grouped_B49_0,grouped_B49_1,grouped_B49_2,grouped_B49_3)

################## PLOTS ##########################
#install.packages("ggplot2")
library(ggplot2)
#install.packages("Hmisc")
library("Hmisc")
greys <-c("black","#4D4D4D", "#888888", "#AEAEAE", "#CCCCCC")
library("Rmisc")
###############  ############### ###############
#mean YFP expression in medium E all 3 promoters
y=tapply(subset(grouped_E,Time==0)$YFP, subset(grouped_E,Time==0)$promoter, mean)
c=tapply(subset(grouped_E,Time==0)$CFP, subset(grouped_E,Time==0)$promoter, mean)
y/c


#### overview plot
#FOLD CHANGES
#normalize by mean CFP and YFP of E-control (=no gal, ie ancestral fluor.)
plot= ggplot()+
  # xlim(0,55)+
  # ylim(0,20)+
  scale_x_continuous(trans = "log", breaks = c(-1, 0, 1, 10, 100), limits = c(NA, 100))+
  scale_y_continuous(trans = "log", breaks = c(-1, 0, 1, 10, 50), limits = c(NA,50))+
  
  theme_bw()+ theme(legend.position ="none")+ #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#normalize
n=mean(grouped_E$CFP)
ny=mean(grouped_E$YFP)
b= plot+geom_line(data=grouped_B22_1, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_B49_1, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_B22_1, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_B49_1, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
b
c=plot+geom_line(data=grouped_B22_2, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_B49_2, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_B22_2, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_B49_2, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
c

a=plot+geom_line(data=grouped_B22_0, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_B49_0, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_B22_0, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_B49_0, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
a
d=plot+geom_line(data=grouped_B22_3, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_B49_3, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_B22_3, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_B49_3, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
d
e=plot+geom_line(data=grouped_E, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_E, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)
e
multiplot(a,b,c,d,cols=1)


## IT030 
#growth
ggplot() + 
  geom_point(data=grouped_E, aes(x=Time, y=OD600, group=Well, color=as.factor(promoter), shape=as.factor(strain)))+ 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

## CFP in both backgrounds
#CFP -22
plot = ggplot() + 
  ylim(0, 22000)+
  theme_bw()+ scale_color_manual(values=c("red","black")) + theme(legend.position = "none")+theme(axis.title.x=element_blank(),axis.title.y=element_blank())+ 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
a=plot+geom_line(data=subset(grouped_All,promoter=="0"&strain=="22"), aes(x=Time, y=CFP/OD600, group=Well, color="IS+"))+
  geom_line(data=subset(grouped_All,promoter=="0"&strain=="49"), aes(x=Time, y=CFP/OD600, group=Well, color="IS-"))
b=plot+ geom_line(data=subset(grouped_All,promoter=="1"&strain=="22"), aes(x=Time, y=CFP/OD600, group=Well, color="IS+"))+
  geom_line(data=subset(grouped_All,promoter=="1"&strain=="49"), aes(x=Time, y=CFP/OD600, group=Well, color="IS-"))
c=plot+ geom_line(data=subset(grouped_All,promoter=="2"&strain=="22"), aes(x=Time, y=CFP/OD600, group=Well, color="IS+"))+
  geom_line(data=subset(grouped_All,promoter=="2"&strain=="49"), aes(x=Time, y=CFP/OD600, group=Well, color="IS-"))
d=plot+ geom_line(data=subset(grouped_All,promoter=="3"&strain=="22"), aes(x=Time, y=CFP/OD600, group=Well, color="IS+"))+
  geom_line(data=subset(grouped_All,promoter=="3"&strain=="49"), aes(x=Time, y=CFP/OD600, group=Well, color="IS-"))
e=plot+geom_line(data=subset(grouped_E), aes(x=Time, y=CFP/OD600, group=Well,  linetype=as.factor(promoter)))
multiplot(a,b,c,d,cols=1)  

## YFP in both backgrounds
#YFP -22
plot = ggplot() + 
  ylim(0, 41000)+
  theme_bw()+ scale_color_manual(values=c("black","red")) + theme(legend.position = "none")+theme(axis.title.x=element_blank(),axis.title.y=element_blank())+ 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
a=plot+geom_line(data=subset(grouped_All,promoter=="0"&strain=="22"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))+
  geom_line(data=subset(grouped_All,promoter=="0"&strain=="49"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))
b=plot+ geom_line(data=subset(grouped_All,promoter=="1"&strain=="22"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))+
  geom_line(data=subset(grouped_All,promoter=="1"&strain=="49"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))
c=plot+ geom_line(data=subset(grouped_All,promoter=="2"&strain=="22"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))+
  geom_line(data=subset(grouped_All,promoter=="2"&strain=="49"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))
d=plot+ geom_line(data=subset(grouped_All,promoter=="3"&strain=="22"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))+
  geom_line(data=subset(grouped_All,promoter=="3"&strain=="49"), aes(x=Time, y=YFP/OD600, group=Well, color=as.factor(strain)))
e=plot+geom_line(data=subset(grouped_E), aes(x=Time, y=YFP/OD600, group=Well,  linetype=as.factor(promoter)))
multiplot(a,b,c,d,cols=1)  


#finding out whether we can see indications for YFP higher in p0 and p0-2:
## plot #49 (IS-) for timepoint 1
yo49=ggplot()+
  #xlim(0.1,0.8)+ylim(1000,3000)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_point(data=subset(grouped_B49_0,Time==0), aes(x=OD600, y=(YFP), group=Well, color="p0"))+
  geom_point(data=subset(grouped_B49_1,Time==0), aes(x=OD600, y=(YFP), group=Well, color="p0-1"))+
  geom_point(data=subset(grouped_B49_2,Time==0), aes(x=OD600, y=(YFP), group=Well, color="p0-2"))+
  geom_point(data=subset(grouped_B49_3,Time==0), aes(x=OD600, y=(YFP), group=Well, color="p0-3"))
# the reason for this difference is just higher OD in p0-2 

############### ############## ###############
## for paper plot #49 (IS-) for timepoint 1
ggplot()+
  #xlim(0.1,0.8)+ylim(1000,3000)+
  theme_bw()+   #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_jitter(data=subset(grouped_B49_0,Time==0), aes(x=0, y=(OD600), group=Well, color="p0"), alpha=0.1)+
  geom_bar(data=mean(subset(grouped_B49_0,Time==0)), aes(x=0, y=(OD600), group=Well, color="p0"))+
  #geom_errorbar(data=(subset(grouped_B49_0,Time==0), aes(x=0, y=(OD600), group=Well, color="p0"))+
   geom_jitter(data=subset(grouped_B49_1,Time==0), aes(x=1, y=(OD600), group=Well, color="p0-1"), alpha=0.1)+
  geom_jitter(data=subset(grouped_B49_2,Time==0), aes(x=2, y=(OD600), group=Well, color="p0-2"), alpha=0.1)+
  geom_jitter(data=subset(grouped_B49_3,Time==0), aes(x=3, y=(OD600), group=Well, color="p0-3"), alpha=0.1)

##group and take mean
grouped_B49_0123=rbind(grouped_B49_0, grouped_B49_1,grouped_B49_2,grouped_B49_3)
library(plyr)
r2<-ddply(subset(grouped_B49_0123, Time==0), .(promoter), summarize, mean=mean(OD600),sd=sd(OD600))
r3<-ddply(subset(grouped_E,Time==0 & strain == 49),.(promoter),summarize,mean=mean(OD600),sd=sd(OD600))

#Figure 6 Supplement/ Supplementary Figure 5C 
#points with geom_crossbar #IS- 24h in gal
ggplot(data=subset(grouped_B49_0123,Time==0),aes(x=promoter,y=OD600))+
  ylim(0.1,0.8)+
  theme_bw()+   #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_jitter(alpha=0.2)+
  geom_crossbar(data=r2,aes(x=promoter, y=mean,ymin=mean-sd, ymax=mean+sd),width=0.9)

#points with geom_crossbar #IS- 24h in control medium without gal
ggplot(data=subset(grouped_E,Time==0 &strain==49),aes(x=promoter,y=OD600))+
  ylim(0.1,0.8)+
    theme_bw()+   #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_jitter(alpha=0.2)+
  geom_crossbar(data=r3,aes(x=promoter, y=mean,ymin=mean-sd, ymax=mean+sd), width=0.9)

##how many replicates?
length(subset(grouped_E,Time==0 &strain==49&promoter==3)$Well)
plate_info
##supplement: t.test
?t.test()
#p0 versus p0-1. **
p1=t.test(subset(grouped_B49_0123,Time==0 & promoter == 0)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==1)$OD600)
#p0 versus p0-2 **
p2=t.test(subset(grouped_B49_0123,Time==0 & promoter == 0)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==2)$OD600)
#p1 versus p0-3 **
p3=t.test(subset(grouped_B49_0123,Time==0 & promoter == 1)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==3)$OD600)
#p0 versus p0-3 **
p4=t.test(subset(grouped_B49_0123,Time==0 & promoter == 0)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==3)$OD600)
#p1 versus p0-2 **
p5=t.test(subset(grouped_B49_0123,Time==0 & promoter == 1)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==2)$OD600)
#p2 versus p0-3 **
p6=t.test(subset(grouped_B49_0123,Time==0 & promoter == 2)$OD600,subset(grouped_B49_0123,Time==0 & promoter ==3)$OD600)

#adjust against multiple testing. all still **
p.adjust(c(p1$p.value,p2$p.value,p3$p.value,p4$p.value,p5$p.value,p6$p.value), method = "bonferroni", n = length(p))#6


## violin plot
ggplot()+
  #xlim(0.1,0.8)+ylim(1000,3000)+
  theme_bw()+   #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_violin(data=subset(grouped_B49_0123,Time==0), aes(x=promoter, y=OD600, group=promoter, fill=promoter))
  





### Figure 6A plots 
#FOLD CHANGES
#normalize by mean CFP and YFP of E-control
plot= ggplot()+
  xlim(0,22)+
  ylim(0,9)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 


#normalize
n=mean(grouped_E$CFP)
ny=mean(grouped_E$YFP)
ctr=geom_point(data=subset(grouped_E), aes(x=(CFP/n), y=(YFP/ny)),alpha=0.5,shape=3,col="grey")
a=plot+geom_point(data=grouped_B22_0, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time)),size=1.5)+
  geom_point(data=grouped_B49_0, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time), color="IS1C-"),size=1.5)+ctr+ theme(legend.position = "none")
b=plot+geom_point(data=grouped_B22_1, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time)),size=1.5)+
  geom_point(data=grouped_B49_1, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time), color="IS1C-"),size=1.5)+ctr+ theme(legend.position = "none")
c=plot+geom_point(data=grouped_B49_2, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time), color="IS1C-"),size=1)+ theme(legend.position = "none")+
  geom_point(data=grouped_B22_2, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time)),size=1)+ctr
d=plot+geom_point(data=grouped_B22_3, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time)),size=1.5)+
  geom_point(data=grouped_B49_3, aes(x=(CFP/n), y=(YFP/ny),alpha=(-Time), color="IS1C-"),size=1.5)+ctr+ theme(legend.position = "none")
multiplot(a,b,c,d,cols=2)





#### Figure 6B plots
## YFP/CFP plot #22+49 p0,p2
plot=ggplot()+
  ylim(0,28.5)+xlim(1,10)+scale_x_continuous(breaks=seq(0,10,1))+
  theme_bw() + theme(legend.position = "none")+theme(axis.title.x=element_blank(),axis.title.y=element_blank())+ 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))
ctr=plot+geom_line(data=grouped_E, aes(x=Time+1, y=YFP/CFP, group=Well), color="grey",  alpha=0.4)
a= ctr+geom_line(data=grouped_B22_0, aes(x=Time+1, y=YFP/CFP, group=Well), color="black") 
b=ctr+ geom_line(data=grouped_B22_2, aes(x=Time+1, y=YFP/CFP, group=Well), color="black") 
c= ctr+geom_line(data=grouped_B49_0, aes(x=Time+1, y=YFP/CFP, group=Well), color="black") 
d=ctr+geom_line(data=grouped_B49_2, aes(x=Time+1, y=YFP/CFP, group=Well), color="black") 
multiplot(a,b,c,d,cols=2)
#paper plot (Time +1 as first measurement is after 24h)
multiplot(b,d,cols=2)

# Figure 6 supplement plot/ Supplementary Figure 5A
#(Time +1 as first measurement is after 24h)
# Is there a higher level of amplification (CFP) in p0-0 versus p0-2 (IS+)?
p=ggplot()+ylim(0,21000)+scale_x_continuous(breaks=seq(0,10,1))+
  theme_bw()+  theme_bw() + theme(legend.position = "none")+theme(axis.title.x=element_blank(),axis.title.y=element_blank())+ 
  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
ctr=geom_line(data=grouped_E, aes(x=Time+1, y=CFP/OD600, group=Well), color="grey", alpha=0.9)
p02=p+ctr+geom_line(data=grouped_B22_2, aes(x=Time+1, y=CFP/OD600, group=Well), color="black")
p0=p+ ctr+geom_line(data=grouped_B22_0, aes(x=Time+1, y=CFP/OD600, group=Well), color="black")
multiplot(p0,p02,cols=2)  
##seems very much like it

#wells with increased YFP+/CFP################
plot=ggplot()+
  ylim(0,27.5)+xlim(0,10)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))

#chose Y/C > 11 (bit arbitrary, but see plots)
# 22-0 C6 is staying high (by eye)
plot+  geom_line(data=grouped_B22_0, aes(x=Time, y=YFP/CFP, group=Well, color="all wells"), size=1, alpha=0.3)+
 geom_line(data=subset(grouped_B22_0,Well=="C6"), aes(x=Time, y=YFP/CFP, group=Well, color=Well), size=1)

#22-2 A11 B10 C4  E1  E9  F4 
plot+ geom_line(data=grouped_B22_2, aes(x=Time, y=YFP/CFP, group=Well, color="all wells"), size=1, alpha=0.3)+ geom_line(data=subset(grouped_B22_2,Well=="A11" |Well=="B10"|Well=="C4"|Well=="E1"|Well=="E9"|Well=="F4"), aes(x=Time, y=YFP/CFP, group=Well, color=Well), size=1)
unique(grouped_B22_2[which(grouped_B22_2$YFP/grouped_B22_2$CFP >11),]$Well)
#49-0
plot+ geom_line(data=grouped_B49_0, aes(x=Time, y=YFP/CFP, group=Well, color="all wells"), size=1, alpha=0.3)+ geom_line(data=subset(grouped_B49_0,Well=="B5"|Well=="B8"|Well=="C8"|Well=="F2"|Well=="G10"|Well=="H4"|Well=="H5"), aes(x=Time, y=YFP/CFP, group=Well, color=Well), size=1)
unique(grouped_B49_0[which(grouped_B49_0$YFP/grouped_B49_0$CFP >14),]$Well)
#49-2
plot+ geom_line(data=grouped_B49_2, aes(x=Time, y=YFP/CFP, group=Well, color="all wells"), size=1, alpha=0.3)+ geom_line(data=subset(grouped_B49_2,Well=="A7"|Well=="A8"|Well=="C3"|Well=="C5"|Well=="C11"|Well=="D7"|Well=="F2"|Well=="F3"|Well=="F6"|Well=="F10"|Well=="F11"|Well=="F12"|Well=="G1"|Well=="H3"|Well=="H4"|Well=="H5"|Well=="H6"|Well=="H8"|Well=="H9"|Well=="H12"), aes(x=Time, y=YFP/CFP, group=Well, color=Well), size=1)
unique(grouped_B49_2[which(grouped_B49_2$YFP/grouped_B49_2$CFP >11),]$Well)
#49-2 all well names <9
plot+ geom_line(data=subset(grouped_B49_2,YFP/CFP >9), aes(x=Time, y=YFP/CFP, group=Well, color=Well), size=1, alpha=0.9)
####
##############################################
subset(grouped_B49_0, Well == "H4")$YFP/subset(grouped_B49_0, Well == "H4")$CFP
## YFP/CFP plot #49
plot=ggplot()+
  ylim(0,28)+xlim(0,10)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=10)) +
  geom_line(data=grouped_E, aes(x=Time, y=YFP/CFP, group=Well, color="ctr"), size=1, alpha=0.3)
a=plot+ geom_line(data=grouped_B49_0, aes(x=Time, y=YFP/CFP, group=Well, color="p0"), size=1)
b=plot+ geom_line(data=grouped_B49_1, aes(x=Time, y=YFP/CFP, group=Well, color="p0-1"), size=1)
c=plot+ geom_line(data=grouped_B49_2, aes(x=Time, y=YFP/CFP, group=Well, color="p0-2"), size=1)
d=plot+geom_line(data=grouped_B49_3, aes(x=Time, y=YFP/CFP, group=Well, color="p0-3"), size=1)
multiplot(a,b,c,d,cols=2)
#much higher level of YFP/CFP - different y-axes


#CFP/OD IS+
plot=ggplot()+
  ylim(0,15)+xlim(0,10)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))
ctr=plot+geom_line(data=grouped_E, aes(x=Time, y=CFP/OD600/1000, group=Well), color="grey",  alpha=0.4)
CFP_22=ctr+ geom_line(data=grouped_B22_2, aes(x=Time, y=CFP/OD600/1000, group=Well),color="black", size=1)+
  geom_line(data=subset(grouped_B22_2,Well=="A11" |Well=="B10"|Well=="C4"|Well=="E1"|Well=="E9"|Well=="F4"), aes(x=Time, y=CFP/OD600/1000, group=Well), color="dark orange", size=1)

#CFP/OD IS
plot=ggplot()+
  ylim(0,15)+xlim(0,10)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15))
ctr=plot+geom_line(data=grouped_E, aes(x=Time, y=CFP/OD600/1000, group=Well), color="grey",  alpha=0.4)
CFP_49=ctr+ geom_line(data=grouped_B49_2, aes(x=Time, y=CFP/OD600/1000, group=Well),color="black", size=1)

multiplot(CFP_22,CFP_49,cols=2)
multiplot(ratio_22,ratio_49,cols=2)


########## p0-2 seq results 1

#IS- 
#indel/SNP: A7  C3  C5  F2  F10  H12 
# large p0 deletion: D7
# ancestral: F6, G1

#IS+
#indel/SNP: A11, B10, F4 (early rising)
#ancestral: C4, E9 (early rising)

############# p0-2 seq results 2

#sequencing plate- wells with mutations (#+ evolution plate well info!):
#G01, #H9 Y+,
#A03, #F2 Y+,
#G03, #D1 Y+,
#C05, #E2 Y+,
#E04, #A1 Y+,(bigger band!) maps to insD1 coding sequence way downstream (check borders - hopefully p02 or gal)
#E05, #E5 Y+, (gap)
#A02, #C5 Y+, (huge deletion in p02) 
#E02, #H5 Y+, (huge deletion in p02) 
## in all of these popualtions, the respective Y- colonies are ancestral.

#For IS- it seems 8/37 (8/17 Y+)successful sequencing runs have mutations in p02 
#For IS+ it seems 0/35 successful sequencing runs have mutations in p02


#successful seq reactions of seq plate:
# — A01 Y+ missing
# A02
# A03
# A04
# A05
# A06
# A07
# A08
# A09
# A10   (4 IS-, 5 IS+)
# 
# B01-B10 (5 IS-, 5 IS+) 
# 
# — C01 Y+ missing
# C02
# C04
# C05
# C06
# C07
# C08
# C09
# C10 (4IS-, 5 IS+)
# 
# D01-D09 (5 IS-, 4 IS+)
# 
# E01-E09 (5 IS-, 4 IS+)
# 
# F01-F09 (5 IS-, 4 IS+)
# 
# G01-G04 
# G06-G09  (4 IS-, 4 IS+)
# 
# H01-H09 (5 IS-, 4 IS+)



##  Figure 6 C plots
#### plotting results of sequencing in barcharts
seqdf= read.csv2("sequenced_EE25_colonies.txt", header=TRUE, sep= "\t") 
names(seqdf)

seqdf$fraction=seqdf$promoter.mutation/seqdf$total

#total sequenced
ggplot(seqdf, aes(x = seqdf$population.phenotype, y = seqdf$total, fill = seqdf$colony.phenotype)) + 
  geom_bar(stat = "identity")+facet_wrap(~seqdf$strain)

#plot mutants and ancestral sequences (adding up to total)
ggplot(subset(seqdf), aes(x = strain, y = counts, fill = P0.2_sequence)) + 
  geom_bar(stat = "identity")+facet_wrap(~seqdf$colony.phenotype)+
  scale_fill_manual(values=c("grey","dark green"))+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank()) 

##different facet wrap; plot mutants and ancestral sequences (adding up to total)
ggplot(subset(seqdf), aes(x = colony.phenotype, y = counts, fill = P0.2_sequence)) + 
  geom_bar(stat = "identity")+facet_wrap(~strain)+
  scale_fill_manual(values=c("grey","dark green"))+ coord_flip()+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank()) 


